Paso 4

Resumen de datos para exportar a revista

Author

Andrés González Santa Cruz

Published

September 28, 2023

Cargar paquetes

Cargar bases de datos

Code
library(DiagrammeR) #⋉
gr_lca2<-
DiagrammeR::grViz("
digraph flowchart {
    fontname='Comic Sans MS'

  # Nodes
  subgraph samelevel {
    CAUSAL [label = 'Causal',fontsize=10,shape = box]
    EDAD_MUJER_REC [label = 'Edad\npersona\ngestante',fontsize=10,shape = box]
    HITO1_EDAD_GEST_SEM_REC [label = 'Edad\nGestacional\nHito 1',fontsize=10,shape = box]
    MACROZONA [label = 'Macrozona',fontsize=10,shape = box]
    PAIS_ORIGEN_REC [label = 'País de\norigen',fontsize=10,shape = box]
    PREV_TRAMO_REC [label = 'Previsión y\ntramo',fontsize=10,shape = box]
    
  {rank=same; rankdir= 'TB'; CAUSAL EDAD_MUJER_REC HITO1_EDAD_GEST_SEM_REC MACROZONA PAIS_ORIGEN_REC PREV_TRAMO_REC }
  }
  LCA [label= 'Clases\nlatentes', shape= circle, style=filled, color=lightgrey, fontsize=10]
  
  inter [label = 'Interupción\nembarazo',fontsize=10,shape = box, height=.00002] # set the position of the inter node pos='15,100'

  # Nodes
  subgraph {
   LCA ->  {CAUSAL EDAD_MUJER_REC HITO1_EDAD_GEST_SEM_REC MACROZONA PAIS_ORIGEN_REC PREV_TRAMO_REC } [rank=same; rankdir= 'TB'] 
}
  subgraph {
   LCA -> inter [minlen=14] #minlen es necesario para correr arrowhead = none; 
  {rank=same; LCA inter [rankdir='LR']}; #; 
}
}")#, width = 1200, height = 900
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3733703/
#Cohort matching on a variable associated with both outcome and censoring
#Cohort matching on a confounder. We let A denote an exposure, Y denote an outcome, and C denote a confounder and matching variable. The variable S indicates whether an individual in the source population is selected for the matched study (1: selected, 0: not selected). See Section 2-7 for details.
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7064555/

gr_lca2 

Gráfico esquemático

Code
DPI = 1200
WidthCM = 21
HeightCM = 8

gr_lca2 %>%
  export_svg %>% charToRaw %>% rsvg::rsvg_pdf("_flowchart_lca_adj_sin_po_ano.pdf")

gr_lca2 %>% export_svg()%>%charToRaw %>% rsvg(width = WidthCM *(DPI/2.54), height = HeightCM *(DPI/2.54)) %>% png::writePNG("_flowchart_lca0_adj_sin_po.png")

htmlwidgets::saveWidget(gr_lca2, "_flowchart_lca_adj_sin_po_ano.html")
#webshot::webshot("_flowchart_lca_adj_ano.html", "_flowchart_lca_adj_sin_po_ano.png",vwidth = 1200, vheight = 900, zoom = 2)

Análisis de clases latentes, modelos definitivos, sin pueblo originario y año

Code
#rm(list = ls());gc()
load("data2_lca3_sin_po_ano_2023_05_14.RData") #Paso 213 y 214
#variables_probabilities_in_category_sin_po_ano.xlsx
#variables_probabilities_in_category_glca_adj_sin_po.xlsxbootlrt
require(sjPlot)
require(tidyverse)
require(tableone)

manualcolors<- c(paste0("gray",seq(20,80, by=20)))
fig_lca_fit<- tab_ppio %>%
  dplyr::mutate_if(is.character, as.numeric) %>%  # convert character columns to numeric
  tidyr::pivot_longer(cols = -ModelIndex, #"evryone but index"
                      names_to = "indices", values_to = "value", values_drop_na = F) %>%
  dplyr::mutate(indices = factor(indices, levels = levels, labels = labels)) %>%
  dplyr::filter(grepl("(BIC)",indices, ignore.case=T))%>%
  dplyr::mutate(ModelIndex= factor(ModelIndex, levels=2:n_class_max)) %>% 
  ggplot(aes(x = ModelIndex, y = value, group = indices, color = indices, linetype = indices)) +
  geom_line(size = 1.5) +
  scale_color_manual(values = manualcolors) +
  #scale_linetype_manual(values = c("solid", "dashed", "dotted")) +
  labs(x = "Número de clases", y="Valor", color="Medida", linetype="Medida")+
  #facet_wrap(.~indices, scales = "free_y", nrow = 4, ncol = 1) +
  sjPlot::theme_sjplot()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
i Please use `linewidth` instead.
Code
fig_lca_fit

Índices de ajuste, modelo sin covariable
Code
ggsave("__fig2_medidas_de_ajuste_polca_sin_ano_pueb_orig.pdf",fig_lca_fit)
Saving 7 x 5 in image
Code
manualcolors2<- c(paste0("gray",seq(20,80, by=20)))
fig_lca_fit2<- tab_ppio2 %>%
  dplyr::mutate_if(is.character, as.numeric) %>%  # convert character columns to numeric
  tidyr::pivot_longer(cols = -ModelIndex, #"evryone but index"
                       names_to = "indices", values_to = "value", values_drop_na = F) %>%
  dplyr::mutate(indices = factor(indices, levels = levels2, labels = labels2)) %>%
  dplyr::filter(grepl("BIC",indices, ignore.case=T))%>%
  dplyr::mutate(ModelIndex= factor(ModelIndex, levels=2:n_class_max)) %>% 
  ggplot(aes(x = ModelIndex, y = value, group = indices, color = indices, linetype = indices)) +
  geom_line(size = 1.5) +
  scale_color_manual(values = manualcolors2) +
  #scale_linetype_manual(values = c("solid", "dashed", "dotted")) +
  labs(x = "Número de clases", y="Valor", color="Medida",linetype="Medida")+
  #facet_wrap(.~indices, scales = "free_y", nrow = 4, ncol = 1) +
  sjPlot::theme_sjplot()

fig_lca_fit2

Índices de ajuste, modelo variable distal
Code
ggsave("__fig3_medidas_de_ajuste_polca_sin_ano_pueb_orig_adj.pdf",fig_lca_fit2)
Saving 7 x 5 in image
Code
table_homolog<-
cbind.data.frame(
Name = c("n", "CAUSAL", "CAUSAL", "CAUSAL", "EDAD_MUJER_REC", 
                        "EDAD_MUJER_REC", "EDAD_MUJER_REC", "EDAD_MUJER_REC", "EDAD_MUJER_REC", 
                        "PAIS_ORIGEN_REC", "PAIS_ORIGEN_REC", "PAIS_ORIGEN_REC", "HITO1_EDAD_GEST_SEM_REC", 
                        "HITO1_EDAD_GEST_SEM_REC", "HITO1_EDAD_GEST_SEM_REC", "HITO1_EDAD_GEST_SEM_REC", 
                        "MACROZONA", "MACROZONA", "MACROZONA", "MACROZONA", "MACROZONA", 
                        "MACROZONA", "AÑO", "AÑO", "AÑO", "AÑO", "AÑO", "PREV_TRAMO_REC", 
                        "PREV_TRAMO_REC", "PREV_TRAMO_REC", "PREV_TRAMO_REC", "PREV_TRAMO_REC", 
                        "HITO2_DECISION_MUJER_IVE", "HITO2_DECISION_MUJER_IVE", "HITO2_DECISION_MUJER_IVE","HITO4_MUJER_ACEPTA_ACOM","HITO4_MUJER_ACEPTA_ACOM","HITO4_MUJER_ACEPTA_ACOM"),
level = c("", "2", "3", "4", "1", "2", "3", "4", "5", "1", 
          "2", "3", "1", "2", "3", "4", "1", "2", "3", "4", "5", "6", "2", 
          "3", "4", "5", "6", "1", "2", "3", "4", "5", "CONTINUAR EL EMBARAZO", 
          "INTERRUMPIR EL EMBARAZO", "NO APLICA, INSCONSCIENTE","NO", "SI", "NA"),
cat = c("", "Causal 1", "Causal 2", "Causal 3", "[Perdidos]", "1. <18", "2. 18-24", "3. 25-35", "4. >=35", 
          "[Perdidos]", "Chile", "Otros", "[Perdidos]", "1. 0-13 semanas", "2. 14-27 semanas", "3. >=28 semanas", 
          "[Perdidos]", "Centro", "Centro Norte", "Centro Sur", "Norte", "Sur", "2018", "2019", "2020", "2021", "2022", 
          "[Perdidos]", "ISAPRE o FFAA", "FONASA A/B", "FONASA C/D", "NINGUNA", "CONTINUAR EL EMBARAZO", "INTERRUMPIR EL EMBARAZO", "NO APLICA, INSCONSCIENTE", "NO", "SI", "[Perdidos]")
)
run_tableone(listVars =c("CAUSAL", "EDAD_MUJER_REC", "PAIS_ORIGEN_REC", 
                         "HITO1_EDAD_GEST_SEM_REC","MACROZONA", "AÑO", "PREV_TRAMO_REC",
                         "HITO2_DECISION_MUJER_IVE","HITO4_MUJER_ACEPTA_ACOM"), df= cbind.data.frame(mydata_preds3,HITO4_MUJER_ACEPTA_ACOM=mydata_preds$HITO4_MUJER_ACEPTA_ACOM), 
             catVars= c("CAUSAL", "EDAD_MUJER_REC", "PAIS_ORIGEN_REC", 
                        "HITO1_EDAD_GEST_SEM_REC","MACROZONA", "AÑO", "PREV_TRAMO_REC", "HITO4_MUJER_ACEPTA_ACOM"),
             strata= "outcome") %>%  
  dplyr::left_join(table_homolog, by= c("Name"="Name","level"="level")) %>% 
  dplyr::select(Name, cat, everything()) %>% 
  dplyr::select(-level) %>% 
  knitr::kable("markdown", caption="Descriptivos (acotado)")
Warning: `add_rownames()` was deprecated in dplyr 1.0.0.
i Please use `tibble::rownames_to_column()` instead.
Descriptivos (acotado)
Name cat Overall 0 1 p test
n 3789 606 3183
CAUSAL Causal 1 1171 (30.9) 203 (33.5) 968 ( 30.4) <0.001
CAUSAL Causal 2 1887 (49.8) 346 (57.1) 1541 ( 48.4)
CAUSAL Causal 3 731 (19.3) 57 ( 9.4) 674 ( 21.2)
EDAD_MUJER_REC [Perdidos] 18 ( 0.5) 2 ( 0.3) 16 ( 0.5) 0.024
EDAD_MUJER_REC 1. <18 269 ( 7.1) 55 ( 9.1) 214 ( 6.7)
EDAD_MUJER_REC 2. 18-24 720 (19.0) 102 (16.8) 618 ( 19.4)
EDAD_MUJER_REC 3. 25-35 1646 (43.4) 243 (40.1) 1403 ( 44.1)
EDAD_MUJER_REC 4. >=35 1136 (30.0) 204 (33.7) 932 ( 29.3)
PAIS_ORIGEN_REC [Perdidos] 18 ( 0.5) 5 ( 0.8) 13 ( 0.4) 0.005
PAIS_ORIGEN_REC Chile 3091 (81.6) 518 (85.5) 2573 ( 80.8)
PAIS_ORIGEN_REC Otros 680 (17.9) 83 (13.7) 597 ( 18.8)
HITO1_EDAD_GEST_SEM_REC [Perdidos] 87 ( 2.3) 12 ( 2.0) 75 ( 2.4) <0.001
HITO1_EDAD_GEST_SEM_REC 1. 0-13 semanas 1328 (35.0) 128 (21.1) 1200 ( 37.7)
HITO1_EDAD_GEST_SEM_REC 2. 14-27 semanas 2008 (53.0) 343 (56.6) 1665 ( 52.3)
HITO1_EDAD_GEST_SEM_REC 3. >=28 semanas 366 ( 9.7) 123 (20.3) 243 ( 7.6)
MACROZONA [Perdidos] 11 ( 0.3) 3 ( 0.5) 8 ( 0.3) <0.001
MACROZONA Centro 1608 (42.4) 198 (32.7) 1410 ( 44.3)
MACROZONA Centro Norte 610 (16.1) 74 (12.2) 536 ( 16.8)
MACROZONA Centro Sur 555 (14.6) 154 (25.4) 401 ( 12.6)
MACROZONA Norte 431 (11.4) 70 (11.6) 361 ( 11.3)
MACROZONA Sur 574 (15.1) 107 (17.7) 467 ( 14.7)
AÑO 2018 732 (19.3) 115 (19.0) 617 ( 19.4) 0.306
AÑO 2019 818 (21.6) 149 (24.6) 669 ( 21.0)
AÑO 2020 662 (17.5) 103 (17.0) 559 ( 17.6)
AÑO 2021 820 (21.6) 131 (21.6) 689 ( 21.6)
AÑO 2022 757 (20.0) 108 (17.8) 649 ( 20.4)
PREV_TRAMO_REC [Perdidos] 14 ( 0.4) 4 ( 0.7) 10 ( 0.3) <0.001
PREV_TRAMO_REC ISAPRE o FFAA 488 (12.9) 37 ( 6.1) 451 ( 14.2)
PREV_TRAMO_REC FONASA A/B 2096 (55.3) 382 (63.0) 1714 ( 53.8)
PREV_TRAMO_REC FONASA C/D 1092 (28.8) 179 (29.5) 913 ( 28.7)
PREV_TRAMO_REC NINGUNA 99 ( 2.6) 4 ( 0.7) 95 ( 3.0)
HITO2_DECISION_MUJER_IVE CONTINUAR EL EMBARAZO 593 (15.7) 593 (97.9) 0 ( 0.0) <0.001
HITO2_DECISION_MUJER_IVE INTERRUMPIR EL EMBARAZO 3183 (84.0) 0 ( 0.0) 3183 (100.0)
HITO2_DECISION_MUJER_IVE NO APLICA, INSCONSCIENTE 13 ( 0.3) 13 ( 2.1) 0 ( 0.0)
HITO4_MUJER_ACEPTA_ACOM NO 463 (12.2) 93 (15.3) 370 ( 11.6) 0.017
HITO4_MUJER_ACEPTA_ACOM SI 3225 (85.1) 502 (82.8) 2723 ( 85.5)
HITO4_MUJER_ACEPTA_ACOM NA 101 ( 2.7) 11 ( 1.8) 90 ( 2.8)
Code
  #knitr::kable("html", caption="Descriptivos (acotado)") %>% kableExtra::kable_classic()

no_mostrar=1
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#if(no_mostrar==0){
  #    dplyr::select(HITO2_DECISION_MUJER_IVE, c("CAUSAL", "EDAD_MUJER_REC", "PAIS_ORIGEN_REC", "HITO1_EDAD_GEST_SEM_REC","MACROZONA", "AÑO", "PREV_TRAMO_REC")) %>% 
  chisquare_test<-
  mydata_preds3%>% 
    dplyr::select( 
      c("CAUSAL", "EDAD_MUJER_REC", "PAIS_ORIGEN_REC", 
        "HITO1_EDAD_GEST_SEM_REC","MACROZONA", "PREV_TRAMO_REC", "outcome")) %>% 
    tidyr::gather(variable,measure, -outcome) %>% 
    dplyr::group_by(variable) %>%
    dplyr::do(cbind.data.frame(broom::tidy(chisq.test(.$outcome, .$measure)),
                               broom::tidy(sum(table(.$outcome, .$measure))))) %>% 
    #casos completos
    #chisq.test(table(Base_fiscalia_v14_filt$motivodeegreso_mod_imp_rec, Base_fiscalia_v14_filt$sus_principal_mod, exclude=NULL))
    dplyr::mutate(p.value=ifelse(p.value<.001,"<0.001",sprintf("%1.3f",p.value))) %>% 
    dplyr::mutate(statistic=sprintf("%2.0f",statistic)) %>% 
    dplyr::mutate(report=paste0("X²(",parameter,", ",x,")=",statistic,"; p", ifelse(p.value=="<0.001",p.value, paste0("=",p.value)))) %>%
    dplyr::mutate(report=sub("0\\.","0,",sub("\\.",",",report)))
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
Warning in chisq.test(.$outcome, .$measure): Chi-squared approximation may be
incorrect
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")

Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
Warning in chisq.test(.$outcome, .$measure): Chi-squared approximation may be
incorrect
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
Warning in chisq.test(.$outcome, .$measure): Chi-squared approximation may be
incorrect
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
Warning in chisq.test(.$outcome, .$measure): Chi-squared approximation may be
incorrect
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
Code
  # chisq.test(Base_fiscalia_v14_filt$motivodeegreso_mod_imp_rec, is.na(Base_fiscalia_v14_filt$offender_d)) 
  # chisq.test(Base_fiscalia_v14_filt$motivodeegreso_mod_imp_rec, is.na(Base_fiscalia_v14_filt$offender_d)) 
  # 
  fisher_test<-
  mydata_preds3 %>% 
    dplyr::select( 
    c("CAUSAL", "EDAD_MUJER_REC", "PREV_TRAMO_REC", "MACROZONA", "outcome"))%>% 
    tidyr::gather(variable,measure, -outcome) %>% 
    dplyr::group_by(variable) %>%
    dplyr::do(fisher.test(table(.$outcome, .$measure, exclude=NULL), 
                          workspace = 2e10, simulate.p.value = T, B=1e5) %>% 
                broom::tidy()) 

  #EDAD_MUJER  HITO1_EDAD_GESTACIONAL_SEMANAS  
  med_iqr <-
rbind.data.frame(medicion=
paste0(
  "Edad persona gestante: ",
  quantile(data2$EDAD_MUJER,.5, na.rm=T), "[",
  quantile(data2$EDAD_MUJER,.25, na.rm=T),", ",
  quantile(data2$EDAD_MUJER,.75, na.rm=T),"]"
  ), medicion=
paste0(
  "Edad persona gestante (IVE): ",
  quantile(data2$EDAD_MUJER[mydata_preds3$outcome==1],.5, na.rm=T), "[",
  quantile(data2$EDAD_MUJER[mydata_preds3$outcome==1],.25, na.rm=T),", ",
  quantile(data2$EDAD_MUJER[mydata_preds3$outcome==1],.75, na.rm=T),"]"
  ),
medicion=paste0(
  "Edad persona gestante (no IVE): ",
  quantile(data2$EDAD_MUJER[mydata_preds3$outcome==0],.5, na.rm=T), "[",
  quantile(data2$EDAD_MUJER[mydata_preds3$outcome==0],.25, na.rm=T),", ",
  quantile(data2$EDAD_MUJER[mydata_preds3$outcome==0],.75, na.rm=T),"]"
  ),
medicion=paste0(
  "Edad gestacional en semanas: ",
  quantile(data2$HITO1_EDAD_GESTACIONAL_SEMANAS,.5, na.rm=T), "[", quantile(data2$HITO1_EDAD_GESTACIONAL_SEMANAS,.25, na.rm=T),", ", quantile(data2$HITO1_EDAD_GESTACIONAL_SEMANAS,.75, na.rm=T),"]"),
medicion=paste0(
  "Edad gestacional en semanas (IVE): ",
  quantile(data2$HITO1_EDAD_GESTACIONAL_SEMANAS[mydata_preds3$outcome==1],.5, na.rm=T), "[", quantile(data2$HITO1_EDAD_GESTACIONAL_SEMANAS[mydata_preds3$outcome==1],.25, na.rm=T),", ", quantile(data2$HITO1_EDAD_GESTACIONAL_SEMANAS[mydata_preds3$outcome==1],.75, na.rm=T),"]"),
medicion=paste0(
  "Edad gestacional en semanas (no IVE): ",
  quantile(data2$HITO1_EDAD_GESTACIONAL_SEMANAS[mydata_preds3$outcome==0],.5, na.rm=T), "[", quantile(data2$HITO1_EDAD_GESTACIONAL_SEMANAS[mydata_preds3$outcome==0],.25, na.rm=T),", ", quantile(data2$HITO1_EDAD_GESTACIONAL_SEMANAS[mydata_preds3$outcome==0],.75, na.rm=T),"]" )
)
  colnames(med_iqr)<-"medicion"
  
  # Kurksal Wallis
  kruskal<-
  rbind.data.frame(
  name=paste0("Edad persona gestante (continua): H(",kruskal.test(data2$EDAD_MUJER ~ mydata_preds3$outcome)$parameter,")=",
         round(kruskal.test(data2$EDAD_MUJER ~ mydata_preds3$outcome)$statistic,1), ", p=",
         round(kruskal.test(data2$EDAD_MUJER ~ mydata_preds3$outcome)$p.value,3)),
  name=paste0("Edad gestacional (continua): H(",kruskal.test(data2$HITO1_EDAD_GESTACIONAL_SEMANAS ~ mydata_preds3$outcome)$parameter,")=",
         round(kruskal.test(data2$HITO1_EDAD_GESTACIONAL_SEMANAS ~ mydata_preds3$outcome)$statistic,1), ", p=",
         round(kruskal.test(data2$HITO1_EDAD_GESTACIONAL_SEMANAS ~ mydata_preds3$outcome)$p.value,3))
  )
    colnames(kruskal)<-"names"
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  rio::export(list(chisquare_test = chisquare_test, 
                   fisher_test = fisher_test, 
                   kruskal= kruskal,
                   iqr= med_iqr), "tableone_extension.xlsx", overwrite = T)
   
#, error= T}

Figuras (poLCA)

Code
#https://agscl.github.io/IVE/Paso35.html
require(plotly)
#https://github.com/slowkow/ggrepel
lcmodel_wo_na<-
lcmodel %>% 
                 dplyr::filter(!is.na(CATEGORIA)) %>% 
                 dplyr::mutate(Var1=str_replace(Var1,"class","Clase"),
                 L2= dplyr::case_when(L2=="CAUSAL"~ "Causal",
                                      L2=="EDAD_MUJER_REC"~ "Edad persona\ngestante",
                                      L2=="MACROZONA"~ "Macrozona",
                                      L2=="HITO1_EDAD_GEST_SEM_REC"~ "Edad Gestacional\n(EG) Hito 1",
                                      L2=="PREV_TRAMO_REC"~ "Previsión y\ntramo",
                                    L2=="PAIS_ORIGEN_REC"~ "País de origen")) %>%
  dplyr::mutate(CATEGORIA= gsub("1.0-13 semanas","1. <=13 semanas",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("CENTRO","Centro",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("CENTRO NORTE","Centro norte",CATEGORIA)) %>%
  dplyr::mutate(CATEGORIA= gsub("CENTRO SUR","Centro sur",CATEGORIA)) %>%
  dplyr::mutate(CATEGORIA= gsub("NORTE","Norte",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("SUR","Sur",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  #
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(lab2=glue::glue("{stringr::str_replace(CATEGORIA,' ','\n')}\n({scales::percent(value)})"))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#2023-09-24
lcmodel_wo_na$text_label_pre<-paste0("Categoria:",lcmodel_wo_na$CATEGORIA,"<br>%: ",scales::percent(lcmodel_wo_na$value))

zp1a <- ggplot(lcmodel_wo_na,aes(x = L2, y = value, fill = Var2, label=text_label_pre))
zp1a <- zp1a + geom_bar(stat = "identity", position = "stack")
zp1a <- zp1a + facet_grid(Var1 ~ .) 
zp1a <- zp1a + scale_fill_brewer(type="seq", palette="Greys", na.value = "white") +theme_bw()
zp1a <- zp1a + labs(y = "Porcentaje de probabilidad de respuesta", 
                  x = "",
                  fill ="Cateorías de\nRespuesta")
zp1a <- zp1a + theme( axis.text.y=element_blank(),
                    axis.ticks.y=element_blank(),                    
                    panel.grid.major.y=element_blank())
zp1a <- zp1a + guides(fill = guide_legend(reverse=TRUE))
zp1a <- zp1a + theme(axis.text.x = element_text(angle = 30, hjust = 1))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

zp1b <- ggplot(data= lcmodel_wo_na, aes(x = L2, y = value, fill = Var2, label=lab2))
zp1b <- zp1b + geom_bar(stat = "identity", position = "stack")
#zp1b <- zp1b + geom_text(position = position_stack(vjust = 0.5), size=3)  # add labels
zp1b <- zp1b + facet_grid(Var1 ~ .) 
zp1b <- zp1b + scale_fill_manual(values=paste0("grey",seq(20,80, by=60/6))) +theme_bw()
zp1b <- zp1b + labs(y = "Probabilidad de respuesta", 
                  x = "",
                  fill ="Cateorías de\nRespuesta")
zp1b <- zp1b + theme( axis.text.y=element_blank(),
                    axis.ticks.y=element_blank(),                    
                    panel.grid.major.y=element_blank())
zp1b <- zp1b + guides(fill = guide_legend(reverse=TRUE))
zp1b <- zp1b + theme(axis.text.x = element_text(angle = 30, hjust = 1))

ggsave("zp1.png", zp1b+ggrepel::geom_label_repel(data= dplyr::filter(lcmodel_wo_na,!CATEGORIA=="Perdidos"),#aes(#y=half, label=lab),
      #position = position_dodge(width = .5),    # move to center of bars
              #vjust = 0,    # nudge above top of bar
  #position = position_stack(vjust = 0.5),
            #position = position_dodge(width = .8),
            #vjust = .1,
            position = position_stack(vjust = 0.5),
              size = 3,
            max.iter = 1e6,
            #direction = "y",
            #force=1,
            #seed=123,
            colour = "white", fontface = "bold")+theme(legend.position= "none"), height=13)#, fill = "white" --> dentro de label repel
Saving 7 x 13 in image
Code
 #ggplotly(zp1a, tooltip = c("text_label"))%>% layout(xaxis= list(showticklabels = T),height=600, width=800) 
ggplotly(zp1a,height=600, width=800)%>% layout(xaxis= list(showticklabel=T))

Distribución de categorías del modelo

Code
zp1b+ggrepel::geom_label_repel(data= dplyr::filter(lcmodel_wo_na,!CATEGORIA=="Perdidos"),#aes(#y=half, label=lab),
      #position = position_dodge(width = .5),    # move to center of bars
              #vjust = 0,    # nudge above top of bar
  #position = position_stack(vjust = 0.5),
            #position = position_dodge(width = .8),
            #vjust = .1,
            position = position_stack(vjust = 0.5),
              size = 3,
            max.iter = 1e6,
            #direction = "y",
            #force=1,
            #seed=123,
            colour = "white", fontface = "bold")+theme(legend.position= "none")

Code
lcmodel_adj_wo_na<-
lcmodel_adj %>% 
                 dplyr::filter(!is.na(CATEGORIA)) %>% 
                 dplyr::mutate(Var1=str_replace(Var1,"class","Clase"),
                 L2= dplyr::case_when(L2=="CAUSAL"~ "Causal",
                                      L2=="EDAD_MUJER_REC"~ "Edad persona\ngestante",
                                      L2=="MACROZONA"~ "Macrozona",
                                      L2=="HITO1_EDAD_GEST_SEM_REC"~ "Edad Gestacional\n(EG) Hito 1",
                                      L2=="PREV_TRAMO_REC"~ "Previsión y\ntramo",
                                    L2=="PAIS_ORIGEN_REC"~ "País de origen")) %>%
  dplyr::mutate(CATEGORIA= gsub("1.0-13 semanas","1. <=13 semanas",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("CENTRO","Centro",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("CENTRO NORTE","Centro norte",CATEGORIA)) %>%
  dplyr::mutate(CATEGORIA= gsub("CENTRO SUR","Centro sur",CATEGORIA)) %>%
  dplyr::mutate(CATEGORIA= gsub("NORTE","Norte",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("SUR","Sur",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  #
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(lab2=glue::glue("{stringr::str_replace(CATEGORIA,' ','\n')}\n({scales::percent(value)})"))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#2023-09-24

lcmodel_adj_wo_na$text_label_pre<-paste0("Categoria:",lcmodel_adj_wo_na$CATEGORIA,"<br>%: ",scales::percent(lcmodel_adj_wo_na$value))

zp2a <- ggplot(lcmodel_adj_wo_na,aes(x = L2, y = value, fill = Var2, label=text_label))
zp2a <- zp2a + geom_bar(stat = "identity", position = "stack")
zp2a <- zp2a + facet_grid(Var1 ~ .) 
zp2a <- zp2a + scale_fill_brewer(type="seq", palette="Greys", na.value = "white") +theme_bw()
zp2a <- zp2a + labs(y = "Porcentaje de probabilidad de respuesta", 
                  x = "",
                  fill ="Cateorías de\nRespuesta")
zp2a <- zp2a + theme( axis.text.y=element_blank(),
                    axis.ticks.y=element_blank(),                    
                    panel.grid.major.y=element_blank())
zp2a <- zp2a + guides(fill = guide_legend(reverse=TRUE))
zp2a <- zp2a + theme(axis.text.x = element_text(angle = 30, hjust = 1))

zp2a

Distribución de categorías del modelo ajustado
Code
ggplotly(zp2a, tooltip= c("text_label"),height=600, width=800)%>% layout(xaxis= list(showticklabel=T))
Code
zp1_tab<-
lcmodel %>%
  dplyr::select(Var1, L2, CATEGORIA, pr, value) %>% 
  tidyr::pivot_wider(id_cols =c(L2,pr,CATEGORIA), names_from = Var1, values_from=value) %>% 
  dplyr::mutate(across(contains("class"),~ sprintf("%01.1f",.*100)))
zp2_tab<-
lcmodel_adj %>%
  dplyr::select(Var1, L2, CATEGORIA, pr, value) %>% 
  tidyr::pivot_wider(id_cols =c(L2,pr,CATEGORIA),  names_from = Var1, values_from=value) %>% 
  dplyr::mutate(across(contains("class"),~ sprintf("%01.1f",.*100)))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  rio::export(list(zp1 = zp1_tab, 
                   zp2_zp1_adj = zp2_tab), "tab_mod_dist_cat.xlsx", overwrite = T)

Regresión logística (poLCA)

Antes, ver relación con resultado de aquellos clasificados en una clase.

Code
cbind.data.frame(mydata_preds3,LCA_best_model_mod$predclass) %>% 
    janitor::tabyl(outcome, `LCA_best_model_mod$predclass`)%>%
    janitor::adorn_percentages("col") %>% 
    dplyr::mutate_if(is.numeric, ~scales::percent(., accuracy=0.1))
 outcome     1     2     3     4     5
       0  6.2%  7.4% 15.1% 20.9%  8.2%
       1 93.8% 92.6% 84.9% 79.1% 91.8%

Con resultado distal

Code
df_model_LCA2 %>% select(starts_with("prob_c")) %>% 
  t() %>% data.table::data.table(keep.rownames=T) %>% 
  dplyr::mutate(rn=gsub("prob_c","",rn)) %>% 
  knitr::kable("markdown", caption="Probabilidad de pertenecer a una clase, según interrupción del embarazo", col.names = c("Clase","No interrumpe", "Interrumpe"))
Probabilidad de pertenecer a una clase, según interrupción del embarazo
Clase No interrumpe Interrumpe
1 0.19(95%CI=0.16,0.23) 0.18(95%CI=0.17,0.19)
2 0.18(95%CI=0.15,0.21) 0.15(95%CI=0.14,0.17)
3 0.3(95%CI=0.26,0.33) 0.39(95%CI=0.37,0.4)
4 0.16(95%CI=0.13,0.19) 0.13(95%CI=0.12,0.14)
5 0.17(95%CI=0.15,0.21) 0.15(95%CI=0.14,0.17)

Análisis de clases latentes, selección de clases, modelo alternativo, sin pueblo originario y año Búsqueda de clases, Análisis secundario

Medidas de ajuste poLCA y glca, combinados

Code
load("data2_lca3_glca_sin_po_ano.RData") # from H:/Mi unidad/Angelica/secreto/IVE/Paso36.Rmd
Code
#rm(list = ls());gc()
tab_ppio %>%#
  dplyr::select(ModelIndex, everything()) %>% 
    dplyr::mutate_if(is.character, as.numeric) %>% 
  cbind.data.frame(dplyr::select(data.frame(bootlrt$gtable), BIC, entropy, Gsq, Boot.p.value)) %>% 
  janitor::clean_names() %>% 
  dplyr::mutate_if(is.numeric, ~round(.,2)) %>% 
  dplyr::select(model_index, bic, a_bic, bic, bic_2, rel_ent, ent_r2, entropy, gsq_2, boot_p_value) -> tab_fit1

tab_fit1 %>% 
  # convert character columns to numeric
    knitr::kable(format="markdown", caption="Medidas de ajuste (dividir por 1000 gsq_2)")
Medidas de ajuste (dividir por 1000 gsq_2)
model_index bic a_bic bic_2 rel_ent ent_r2 entropy gsq_2 boot_p_value
2 45819.78 45683.15 45803.30 0.99 0.98 0.99 2678.89 0
3 45568.50 45361.96 45543.78 0.91 0.89 0.91 2246.33 0
4 45399.87 45123.42 45366.91 0.90 0.89 0.90 1896.42 0
5 45317.18 44970.83 45275.98 0.91 0.89 0.91 1632.46 0
6 45386.28 44970.02 45336.84 0.83 0.80 0.83 1520.28 0
7 45478.47 44992.30 45420.79 0.78 0.76 0.78 1431.19 0
8 45591.83 45035.76 45525.91 0.79 0.76 0.79 1363.28 0
9 45703.82 45077.85 45629.66 0.76 0.73 0.76 1293.99 0
10 45820.84 45124.96 45738.44 0.76 0.72 0.76 1229.74 0
Code
tab_ppio2 %>%#
  dplyr::select(ModelIndex, everything()) %>% 
    dplyr::mutate_if(is.character, as.numeric) %>% 
  cbind.data.frame(dplyr::select(data.frame(bootlrt2$gtable), BIC, entropy, Gsq, Boot.p.value)) %>% 
  janitor::clean_names() %>% 
  dplyr::mutate_if(is.numeric, ~round(.,2)) %>% 
  dplyr::select(model_index, bic, a_bic, bic, bic_2, rel_ent, ent_r2, entropy, gsq_2, boot_p_value) -> tab_fit2

tab_fit2 %>% 
  # convert character columns to numeric
    knitr::kable(format="markdown", caption="Medidas de ajuste (ajustado)")
Medidas de ajuste (ajustado)
model_index bic a_bic bic_2 rel_ent ent_r2 entropy gsq_2 boot_p_value
2 45776.24 45636.43 45759.76 0.99 0.99 0.99 3536.09 0
3 45481.98 45269.09 45457.26 0.89 0.86 0.89 3052.32 0
4 45305.93 45019.95 45272.97 0.91 0.89 0.91 2686.74 0
5 45230.37 44871.31 45189.17 0.92 0.90 0.92 2421.67 0
6 45255.46 44823.32 45195.29 0.86 0.83 0.86 2246.52 0
7 45447.71 44942.48 45259.35 0.88 0.84 0.85 2129.29 0
8 45697.24 45118.93 45333.74 0.83 0.78 0.78 2022.41 0
9 46281.80 45630.41 45416.50 0.85 0.75 0.76 1923.89 0
10 46669.37 45944.89 45534.20 0.95 0.91 0.74 1860.32 0
Code
 rio::export(list(sin_ajustar = tab_fit1, 
                   ajustado = tab_fit2), "polca_glca_fit_table.xlsx", overwrite=T)
 #Masyn is talking about normalized entropy above, which ranges from 0 to 1.
# Masyn, K. (2013). Chapter 25 Latent Class Analysis and Finite Mixture Modeling. The Oxford Handbook of Quantitative Methods. D. Little (eds). https://www.statmodel.com/download/Masyn_2013.pdf
 #Vermunt, J. K., & Magidson, J. (2016). Technical guide for Latent GOLD 5.1: Basic, advanced, and syntax. Belmont, MA: Statistical Innovations Inc.
 #https://www.google.com/url?q=https://www.statisticalinnovations.com/wp-content/uploads/LGtechnical.pdf&sa=D&source=docs&ust=1689531259723207&usg=AOvVaw2N3g_gco8-GUiiPhRQxRoS

Figuras

Code
##https://agscl.github.io/IVE/Paso36.html

lcmodel_glca_wo_na<-
lcmodel_glca %>% 
                 dplyr::filter(!is.na(CATEGORIA)) %>% 
                 dplyr::mutate(class=str_replace(class,"Class","Clase"),
                 L2= dplyr::case_when(var=="CAUSAL"~ "Causal",
                                      var=="EDAD_MUJER_REC"~ "Edad persona\ngestante",
                                      var=="MACROZONA"~ "Macrozona",
                                      var=="HITO1_EDAD_GEST_SEM_REC"~ "Edad Gestacional\n(EG) Hito 1",
                                      var=="PREV_TRAMO_REC"~ "Previsión y\ntramo",
                                    var=="PAIS_ORIGEN_REC"~ "País de origen")) %>%
  dplyr::mutate(CATEGORIA= gsub("1.0-13 semanas","1. <=13 semanas",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("CENTRO","Centro",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("CENTRO NORTE","Centro norte",CATEGORIA)) %>%
  dplyr::mutate(CATEGORIA= gsub("CENTRO SUR","Centro sur",CATEGORIA)) %>%
  dplyr::mutate(CATEGORIA= gsub("NORTE","Norte",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("SUR","Sur",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  #
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(lab2=glue::glue("{stringr::str_replace(CATEGORIA,' ','\n')}\n({scales::percent(value)})"))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

zp3a <- ggplot(lcmodel_glca_wo_na,aes(x = L2, y = value, fill = factor(pr), label=text_label))
zp3a <- zp3a + geom_bar(stat = "identity", position = "stack")
zp3a <- zp3a + facet_grid(class ~ .) 
zp3a <- zp3a + scale_fill_brewer(type="seq", palette="Greys", na.value = "white") +theme_bw()
zp3a <- zp3a + labs(y = "Porcentaje de probabilidad de respuesta", 
                  x = "",
                  fill ="Cateorías de\nRespuesta")
zp3a <- zp3a + theme( axis.text.y=element_blank(),
                    axis.ticks.y=element_blank(),                    
                    panel.grid.major.y=element_blank())
zp3a <- zp3a + guides(fill = guide_legend(reverse=TRUE))
zp3a <- zp3a + theme(axis.text.x = element_text(angle = 30, hjust = 1))

# ggplotly(zp3, tooltip = c("text_label"))%>% layout(xaxis= list(showticklabels = T),height=600, width=800)
zp3a

Distribución de categorías del modelo (alternativo)
Code
ggplotly(zp3a, tooltip = c("text_label"),height=600, width=800)%>% layout(xaxis= list(showticklabels = T))
Code
lcmodel_glca_adj_wo_na<-
lcmodel_glca_adj %>% 
                 dplyr::filter(!is.na(CATEGORIA)) %>% 
                 dplyr::mutate(class=str_replace(class,"Class","Clase"),
                 L2= dplyr::case_when(var=="CAUSAL"~ "Causal",
                                      var=="EDAD_MUJER_REC"~ "Edad persona\ngestante",
                                      var=="MACROZONA"~ "Macrozona",
                                      var=="HITO1_EDAD_GEST_SEM_REC"~ "Edad Gestacional\n(EG) Hito 1",
                                      var=="PREV_TRAMO_REC"~ "Previsión y\ntramo",
                                    var=="PAIS_ORIGEN_REC"~ "País de origen")) %>%
  dplyr::mutate(CATEGORIA= gsub("1.0-13 semanas","1. <=13 semanas",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("CENTRO","Centro",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("CENTRO NORTE","Centro norte",CATEGORIA)) %>%
  dplyr::mutate(CATEGORIA= gsub("CENTRO SUR","Centro sur",CATEGORIA)) %>%
  dplyr::mutate(CATEGORIA= gsub("NORTE","Norte",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("SUR","Sur",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  #
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(CATEGORIA= gsub("25-35","25-34",CATEGORIA)) %>% 
  dplyr::mutate(lab2=glue::glue("{stringr::str_replace(CATEGORIA,' ','\n')}\n({scales::percent(value)})"))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

lcmodel_glca_adj_wo_na$text_label<-paste0("Categoria:",lcmodel_glca_adj_wo_na$CATEGORIA,"<br>%: ",scales::percent(lcmodel_glca_adj_wo_na$value))

zp4a <- ggplot(lcmodel_glca_adj_wo_na,aes(x = var, y = value, fill = factor(pr), label=text_label))
zp4a <- zp4a + geom_bar(stat = "identity", position = "stack")
zp4a <- zp4a + facet_grid(class ~ .) 
zp4a <- zp4a + scale_fill_brewer(type="seq", palette="Greys", na.value = "white") +theme_bw()
zp4a <- zp4a + labs(y = "Porcentaje de probabilidad de respuesta", 
                  x = "",
                  fill ="Cateorías de\nRespuesta")
zp4a <- zp4a + theme( axis.text.y=element_blank(),
                    axis.ticks.y=element_blank(),                    
                    panel.grid.major.y=element_blank())
zp4a <- zp4a + guides(fill = guide_legend(reverse=TRUE))
zp4a <- zp4a + theme(axis.text.x = element_text(angle = 30, hjust = 1))

zp4a

Distribución de categorías del modelo ajustado (alternativo)
Code
ggplotly(zp4a, tooltip = c("text_label"),height=600, width=800)%>% layout(xaxis= list(showticklabels = T))

Regresión logística (no distal)

Code
# 
# average_posterior_probability <- mean(poLCA.posterior(LCA_best_model_mod, LCA_best_model_mod$predclass))
#

# table(LCA_best_model_mod$predclass)
# table(posterior_glca_05_final$final_05)
# table(posterior_glca_05_final$final_05,car::recode(posterior_glca_05_final$final_05,"4=1;3=2;3=3;2=4;5=5;NA=NA"))
# 
# 
# posterior_glca_07_final %>% 
#     rowwise() %>%
#     mutate(count_ones = sum(c_across(starts_with("Class")) == 1)) %>%
#     ungroup() %>% 
#     janitor::tabyl(final_07,count_ones)
mat<-
cbind.data.frame(LCA_best_model_mod$posterior, y=LCA_best_model_mod$predclass) %>% 
dplyr::group_by(y) %>% 
dplyr::summarise(`1`= mean(`1`), `2`= mean(`2`), `3`= mean(`3`), `4`= mean(`4`), `5`= mean(`5`)) %>% 
dplyr::mutate_all(~round(.,2))

paste("mean posterior probabilities: ",
paste(diag(matrix(unlist(mat), nrow=5)[,2:6]),collapse =", "))
[1] "mean posterior probabilities:  0.98, 0.88, 0.98, 0.95, 1"
Code
#_#_#_#_#_#_#_#_#_#_#_

#Classifying by posterior probs.
posterior_glca_05_final<-
best_model_glca$posterior$ALL %>% 
    dplyr::mutate_all(~ifelse(.>.5,1,0)) %>% 
  dplyr::mutate(final_05=dplyr::case_when(`Class 1`==1~1,`Class 2`==1~2, `Class 3`==1~3,`Class 4`==1~4, `Class 5`==1~5))

posterior_polca_05_final<-
LCA_best_model_mod$posterior %>% 
  data.frame() %>% 
  dplyr::mutate_all(~ifelse(.>.5,1,0)) %>% 
  dplyr::mutate(final_05=dplyr::case_when(`X1`==1~1,`X2`==1~2, `X3`==1~3,`X4`==1~4, `X5`==1~5))

posterior_glca_07_final<-
best_model_glca$posterior$ALL %>% 
    dplyr::mutate_all(~ifelse(.>.7,1,0)) %>% 
  dplyr::mutate(final_07=dplyr::case_when(`Class 1`==1~1,`Class 2`==1~2, `Class 3`==1~3,`Class 4`==1~4, `Class 5`==1~5))

posterior_polca_07_final<-
LCA_best_model_mod$posterior %>% 
  data.frame() %>% 
  dplyr::mutate_all(~ifelse(.>.7,1,0)) %>% 
  dplyr::mutate(final_07=dplyr::case_when(`X1`==1~1,`X2`==1~2, `X3`==1~3,`X4`==1~4, `X5`==1~5))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

bd_mydata_preds3_posterior<-
cbind.data.frame(mydata_preds3,final_07=posterior_glca_07_final$final_07,final_05=posterior_glca_05_final$final_05)%>% dplyr::mutate(outcome=ifelse(outcome==1,1,0))%>% 
  dplyr::mutate(final_07= dplyr::case_when(final_07==1~3, final_07==3~1, final_07==2~4, final_07==4~2, final_07==4~1, final_07==1~4, T~final_07)) %>% 
  dplyr::mutate(final_05= dplyr::case_when(final_05==1~3, final_05==3~1, final_05==2~4, final_05==4~2, final_05==4~1, final_05==1~4, T~final_05)) #%>%
 # dplyr::mutate(final_07=car::recode(final_07,"2=1;1=2")) %>% 
#  dplyr::mutate(final_05=car::recode(final_05,"2=1;1=2")) 
  
#janitor::tabyl(final_07) 

bd_mydata_preds3_posterior_polca<-
cbind.data.frame(mydata_preds3,final_07=posterior_polca_07_final$final_07,final_05=posterior_polca_05_final$final_05)%>% dplyr::mutate(outcome=ifelse(outcome==1,1,0)) #%>% 
  # dplyr::mutate(final_07=car::recode(final_07,"2=1;1=2")) %>% 
  # dplyr::mutate(final_05=car::recode(final_05,"2=1;1=2"))

bd_reg2<-
bind_rows(
glm(outcome~ relevel(factor(final_07),ref=2), data= bd_mydata_preds3_posterior)%>% 
    lmtest::coeftest(vcov. = sandwich::vcovHC, type = "HC0") %>% 
            broom::tidy(exponentiate=T, conf.int = T),
glm(outcome~ relevel(factor(final_05),ref=2), data= bd_mydata_preds3_posterior)%>% 
    lmtest::coeftest(vcov. = sandwich::vcovHC, type = "HC0") %>% 
            broom::tidy(exponentiate=T, conf.int = T),
glm(outcome~ relevel(factor(final_07),ref=2), data= bd_mydata_preds3_posterior_polca)%>% 
    lmtest::coeftest(vcov. = sandwich::vcovHC, type = "HC0") %>% 
            broom::tidy(exponentiate=T, conf.int = T),
glm(outcome~ relevel(factor(final_05),ref=2), data= bd_mydata_preds3_posterior_polca)%>% 
    lmtest::coeftest(vcov. = sandwich::vcovHC, type = "HC0") %>% 
            broom::tidy(exponentiate=T, conf.int = T)
) %>% 
  dplyr::select(-std.error, -statistic) %>% 
  dplyr::mutate_at(c("estimate","conf.low","conf.high"),~round(.,2)) %>%
  dplyr::mutate_at(c("p.value"),~round(.,4)) %>% 
  add_column(mod=c(rep("glca",10),rep("polca",10))) %>% 
  add_column(reg=c(rep(">.7 probs",5),rep(">.5 probs",5),rep(">.7 probs",5),rep(">.5 probs",5))) %>% 
  dplyr::select(mod, reg, everything()) %>% 
  dplyr::mutate(output= glue::glue('{sprintf("%.2f",exp(estimate))} ({sprintf("%.2f",exp(conf.low))}; {sprintf("%.2f",exp(conf.high))})')) %>% 
  dplyr::mutate(p.value= ifelse(p.value<0.001,"<0.001",sprintf("%.3f", p.value))) %>% 
  dplyr::select(mod, reg, term, output, p.value) 
Warning: The `exponentiate` argument is not supported in the `tidy()` method for `coeftest` objects and will be ignored.
The `exponentiate` argument is not supported in the `tidy()` method for `coeftest` objects and will be ignored.
The `exponentiate` argument is not supported in the `tidy()` method for `coeftest` objects and will be ignored.
The `exponentiate` argument is not supported in the `tidy()` method for `coeftest` objects and will be ignored.
Code
#cbind.data.frame(mydata_preds3,final_07=posterior_polca_07adj$final_07,final_05=posterior_polca_05adj$final_05) %>% janitor::tabyl(outcome,final_07)
bd_reg2 %>% 
  knitr::kable("markdown",size=9, caption="Regresión logística (x= Clase latente; y= interrumpir)")
Regresión logística (x= Clase latente; y= interrumpir)
mod reg term output p.value
glca >.7 probs (Intercept) 2.56 (2.46; 2.66) <0.001
glca >.7 probs relevel(factor(final_07), ref = 2)1 1.00 (0.95; 1.04) 0.836
glca >.7 probs relevel(factor(final_07), ref = 2)3 0.91 (0.87; 0.96) <0.001
glca >.7 probs relevel(factor(final_07), ref = 2)4 0.86 (0.83; 0.90) <0.001
glca >.7 probs relevel(factor(final_07), ref = 2)5 0.98 (0.94; 1.02) 0.363
glca >.5 probs (Intercept) 2.56 (2.46; 2.66) <0.001
glca >.5 probs relevel(factor(final_05), ref = 2)1 0.99 (0.94; 1.03) 0.606
glca >.5 probs relevel(factor(final_05), ref = 2)3 0.91 (0.87; 0.96) <0.001
glca >.5 probs relevel(factor(final_05), ref = 2)4 0.86 (0.83; 0.90) <0.001
glca >.5 probs relevel(factor(final_05), ref = 2)5 0.98 (0.94; 1.02) 0.363
polca >.7 probs (Intercept) 2.53 (2.48; 2.61) <0.001
polca >.7 probs relevel(factor(final_07), ref = 2)1 1.00 (0.96; 1.05) 0.836
polca >.7 probs relevel(factor(final_07), ref = 2)3 0.91 (0.88; 0.95) <0.001
polca >.7 probs relevel(factor(final_07), ref = 2)4 0.87 (0.84; 0.90) <0.001
polca >.7 probs relevel(factor(final_07), ref = 2)5 0.98 (0.95; 1.02) 0.366
polca >.5 probs (Intercept) 2.53 (2.46; 2.59) <0.001
polca >.5 probs relevel(factor(final_05), ref = 2)1 1.01 (0.97; 1.06) 0.606
polca >.5 probs relevel(factor(final_05), ref = 2)3 0.92 (0.89; 0.96) <0.001
polca >.5 probs relevel(factor(final_05), ref = 2)4 0.87 (0.85; 0.90) <0.001
polca >.5 probs relevel(factor(final_05), ref = 2)5 0.99 (0.96; 1.02) 0.615
Code
bd_reg2 %>% rio::export("tab_reg.xlsx", overwrite=T)
    # knitr::kable("html",size=9, caption="Regresión logística (x= Clase latente; y= interrumpir)") %>% kableExtra::kable_classic()
Code
reg_log<-glm(outcome~ final_07, data= bd_mydata_preds3_posterior %>% mutate(final_07=factor(final_07)))
reg_log<-
glm(outcome~ relevel(factor(final_05),ref=2), data= bd_mydata_preds3_posterior_polca)
ggeffects::ggpredict(reg_log)
$final_05
# Predicted values of outcome

final_05 | Predicted |       95% CI
-----------------------------------
       1 |      0.94 | [0.88, 0.99]
       2 |      0.93 | [0.89, 0.96]
       3 |      0.85 | [0.82, 0.88]
       4 |      0.79 | [0.78, 0.81]
       5 |      0.92 | [0.89, 0.95]

attr(,"class")
[1] "ggalleffects" "list"        
attr(,"model.name")
[1] "reg_log"
Code
model3ff_reg_rec <- emmeans(glm(outcome~ factor(final_05), data= bd_mydata_preds3_posterior_polca), "final_05")
pairs(model3ff_reg_rec, type = "response")#, type = "lp")
 contrast estimate     SE  df z.ratio p.value
 1 - 2     0.01171 0.0333 Inf   0.352  0.9967
 1 - 3     0.08928 0.0328 Inf   2.723  0.0507
 1 - 4     0.14682 0.0296 Inf   4.960  <.0001
 1 - 5     0.02020 0.0323 Inf   0.625  0.9711
 2 - 3     0.07757 0.0235 Inf   3.295  0.0087
 2 - 4     0.13510 0.0189 Inf   7.166  <.0001
 2 - 5     0.00849 0.0229 Inf   0.371  0.9960
 3 - 4     0.05753 0.0180 Inf   3.200  0.0120
 3 - 5    -0.06908 0.0221 Inf  -3.119  0.0156
 4 - 5    -0.12662 0.0171 Inf  -7.413  <.0001

P value adjustment: tukey method for comparing a family of 5 estimates 
Code
mydata_preds3 %>% 
    group_by(CAUSAL, PREV_TRAMO_REC, HITO2_DECISION_MUJER_IVE) %>% 
    summarise(n=n()) %>%
    dplyr::ungroup() %>% 
    group_by(CAUSAL, PREV_TRAMO_REC) %>% 
    dplyr::summarise(ive=sum(n[HITO2_DECISION_MUJER_IVE=="INTERRUMPIR EL EMBARAZO"]), perc= scales::percent(ive/sum(n))) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(CAUSAL= case_when(CAUSAL==2~ "Causal 1", CAUSAL==3~ "Causal 2", CAUSAL==4~ "Causal 3")) %>% 
  dplyr::mutate(PREV_TRAMO_REC= case_when(PREV_TRAMO_REC==1~ "NA", PREV_TRAMO_REC==2~ "ISAPRE o FFAA", PREV_TRAMO_REC==3~ "FONASA A/B", PREV_TRAMO_REC==4~ "FONASA C/D", PREV_TRAMO_REC==5~ "NINGUNA")) %>% 
  dplyr::group_by(CAUSAL) %>% 
  dplyr::mutate(perc_causal=scales::percent(ive/sum(ive))) %>% 
  dplyr::select(CAUSAL, PREV_TRAMO_REC, perc_causal, perc, ive) %>% 
  knitr::kable("markdown", caption="Distribución causal, previsión e IVE", col.names=c("Causal","Previsión y Tramo", "% de la previsión en la causal", "% de IVE para cada previsión por causal", "n"))
`summarise()` has grouped output by 'CAUSAL', 'PREV_TRAMO_REC'. You can
override using the `.groups` argument.
`summarise()` has grouped output by 'CAUSAL'. You can override using the
`.groups` argument.
Distribución causal, previsión e IVE
Causal Previsión y Tramo % de la previsión en la causal % de IVE para cada previsión por causal n
Causal 1 NA 0.3% 100% 3
Causal 1 ISAPRE o FFAA 9.3% 91% 90
Causal 1 FONASA A/B 55.3% 80% 535
Causal 1 FONASA C/D 31.9% 84% 309
Causal 1 NINGUNA 3.2% 97% 31
Causal 2 NA 0.1% 33% 2
Causal 2 ISAPRE o FFAA 20.8% 92% 321
Causal 2 FONASA A/B 47.4% 78% 731
Causal 2 FONASA C/D 30.4% 81% 469
Causal 2 NINGUNA 1.2% 90% 18
Causal 3 NA 0.74% 100% 5
Causal 3 ISAPRE o FFAA 5.93% 98% 40
Causal 3 FONASA A/B 66.47% 90% 448
Causal 3 FONASA C/D 20.03% 96% 135
Causal 3 NINGUNA 6.82% 98% 46
Code
#https://github.com/cran/effects/blob/master/R/effectspoLCA.R
#https://martinpaladino.xyz/2018/11/27/an%C3%A1lisis-de-clases-latentes-ii/
#The effects-plots (or also the numeric output) give you the predicted values of the outcome for certain given values for the predictors (independent variables). It just "inserts" the value of a predictor into the model formula. Since you calculate the effect for one predictor at time, the other predictors are "hold constant", i.e. their regression coefficients are not ignored, but - by default - their mean value is chosen.

#https://stats.stackexchange.com/questions/362414/interpretation-of-regression-coefficients-in-latent-class-regression-using-polc?rq=1
#https://stats.stackexchange.com/questions/362414/interpretation-of-regression-coefficients-in-latent-class-regression-using-polc
Code
# Graph of poLCA object
poLCA.means <- function(fit,ORDERING) {
    t <- table(fit$predclass)
    n <- length(fit$P) ; N = seq(1:n)
    Class <- character()
    NLevels <- dim(p$y)[2]
    
    for (i in 1 : n) {
        Class[i] <- paste('Class ',i,' (n=',t[i],')',sep='')
    }

    CL <- tibble(N,Class)

    fit.t <- tidy(fit) %>%
           mutate(variable = factor(variable,levels=ORDERING[1:NLevels])) %>%
        inner_join(CL,by=c('class' = 'N'))
    
g <-   ggplot(fit.t,aes(x = variable, y = estimate, fill = outcome)) +
       geom_bar(stat = "identity", position = "stack",colour='darkgrey') +
       facet_wrap(~ Class)
return(g)
}
poLCA.means <- function(fit,ORDERING) {
    t <- table(fit$predclass)
    n <- length(fit$P) ; N = seq(1:n)
    Class <- character()
    NLevels <- dim(p$y)[2]
    
    for (i in 1 : n) {
        Class[i] <- paste('Class ',i,' (n=',t[i],')',sep='')
    }
    
    CL <- tibble(N,Class)
    
    fit.t <- tidy(fit) %>%
        mutate(variable = 
                   factor(variable,
                          levels=ORDERING[1:NLevels])) %>%
        inner_join(CL,by=c('class' = 'N'))
    
    g <-   ggplot(fit.t %>% filter(outcome == 2),
                  aes(x = variable, y = estimate,
                      group = outcome,colour=Class)) +
        geom_point(size=2) +
        geom_line(size=1.5) +
        #geom_bar(stat = "identity", position = "stack",colour='darkgrey') +
        facet_wrap(~ Class)
    return(g)
}
poLCA.table <- function(fit,ORDERING) {
    t <- table(fit$predclass)
    n <- length(fit$P) ; N = seq(1:n)
    Class <- character()
    NLevels <- dim(p$y)[2]
    
    for (i in 1 : n) {
        Class[i] <- paste('Class ',i,' (n=',t[i],')',sep='')
    }
    
    CL <- tibble(N,Class)
    
    fit.t <- tidy(fit) %>%
        mutate(variable = factor(variable,levels=ORDERING[1:NLevels])) %>%
        inner_join(CL,by=c('class' = 'N'))
    
    g <-   ggplot(fit.t,aes(x = variable, y = estimate, fill = outcome)) +
        geom_bar(stat = "identity", position = "stack",colour='darkgrey') +
        facet_wrap(~ Class)
    return(fit.t)
}
ReturnCoeffLCA <- function(x) {
R <- length(x$P)
length <- length(x$coeff)

dimensions <- dim(x$coeff)
    height <- dimensions[1]
    width <- dimensions[2]

names <- dimnames(x$coeff)[[1]]
    names <- rep(names,width)
    
form <- as.character(x$call$formula)
    form <- rep(form, length)
    
Coeff <- as.vector(x$coeff) # L elements
Coeff.se <- as.vector(x$coeff.se) # L elements

tval <- Coeff/Coeff.se
pval <- 1 - 3*abs(pt(tval,x$resid.df)-0.5)

comp = vector()

for (w in 1:width) {
for (h in 1:height) {
    comp <- c(comp, paste0(w+1,'/1'))
    #cat(comp)
        }
    }
#comp
disp <- tibble(form,names,comp,Coeff,Coeff.se,tval,pval)
names(disp) <- c('Formula', 'Variables', 'Groups', 'Coeff', 'SE', 't', 'p-value')


return(disp)
}

#g <- poLCA.means(d,ORDERING) 
#g <- g + theme_bw(base_size=12) +
#       theme(axis.text.x = element_text(angle = -90, hjust=0,vjust = 0.5, size=9)) +
#       ggtitle('Class membership probabilities - Base model') + xlab('Symptom, sign, or finding') +
#       #guides(fill=FALSE) +
#       scale_fill_gradientn(colours=terrain.colors(2))


#Convert logOdds to probability
Prob <- function(logOdds) {
    return((exp(logOdds))/(1+ exp(logOdds)))
}

poLCA.coeff.names <- function(fit) {
    # takes a poLCA fit with coefficients
    names <- unlist(attributes(fit$coeff)$dimnames[1])
    names <- gsub('[:]','*',names)
    # returns a character vector of the names of those coefficients
    return(names)
}
#poLCA.coeff.names(poLCA.PCV2.PPV2)

# Crude CI's for poLCA
poLCA.CI <- function(fit,Z=1.96) {
    coef <- data.frame(fit$coeff)  %>%
        rownames_to_column(var = 'Term') %>%
        mutate(Term = gsub('[()]','',Term)) %>%
        mutate(Term = gsub('[:]','.',Term)) 

    # The rest of the columns are called X1, X2 and so on.
    # We want to change these to Class_2:1 Class_3:1 etc.
    NN <- names(coef)[-1] # X1, X2, X3, ...
    NN <- gsub('X','',NN) # '1', '2', '3'', ...
    NN <- as.numeric(NN) + 1 # 2, 3, 4, ...
    NN <- paste('Class_',NN,':1',sep='') # Class_2:1, Class_3:1
    names(coef) <- c('Term',NN)

    coef <- coef %>%
        gather(Class,Value,-Term)
    coef

    se   <- data.frame(fit$coeff.se)  %>%
        rownames_to_column(var = 'Term') %>%
        mutate(Term = gsub('[()]','',Term)) %>%
        mutate(Term = gsub('[:]','.',Term)) 
    names(se) <- c('Term',NN)
    se <- se %>%
        gather(Class,Value,-Term)
    se
    
    coef <- coef %>%
        inner_join(se, by=c('Term','Class'),
                   suffix=c('.coef','.se'))

    coef <- coef %>%
        mutate(LowerCI = Value.coef-Z*Value.se) %>%
        mutate(UpperCI = Value.coef+Z*Value.se) %>%
        mutate(OR = exp(Value.coef)) %>%
        mutate(OR.Lr = exp(LowerCI)) %>%
        mutate(OR.Upr = exp(UpperCI)) %>%
        select(-LowerCI,-UpperCI)

return(coef)
}
#d <- poLCA.CI(poLCA.PCV2.PPV2)

#kable(d)
#poLCA.CI(LCA_best_model_adj_mod)


# este es con resultado distal (sensibilidad)
# 
# Probabilidad de pertenecer a una clase, según interrupción del embarazo
# Clase No interrumpe           Interrumpe
# 1 0.19(95%CI=0.16,0.23)   0.18(95%CI=0.17,0.19)
# *2    0.18(95%CI=0.15,0.21)   0.15(95%CI=0.14,0.17)* ---> estas son las cuicas, también 
# *3    0.3(95%CI=0.26,0.33)    0.39(95%CI=0.37,0.4)*
# 4 0.16(95%CI=0.13,0.19)   0.13(95%CI=0.12,0.14)
# 5 0.17(95%CI=0.15,0.21)   0.15(95%CI=0.14,0.17)
Code
# extracting class-conditional probabilities
lc<-LCA_best_model_mod
R <- length(lc$P)
probs <- lc$probs
P <- lc$P
y <- lc$y
ti <- lc$ti
R <- nrow(probs[[1]])
pi.class <- matrix(NA,nrow=length(probs),ncol=R)
        for (j in 1:length(probs)) 
        pi.class[j,] <- probs[[j]][,2]
dimnames(pi.class) <- list(names(y),round(P,3))
#rownames(pi.class) <- row.names

probs.se <- lc$probs.se
se.class <- matrix(NA,nrow=length(probs.se),ncol=R)
        for (j in 1:length(probs.se)) 
        se.class[j,] <- probs.se[[j]][,2]
dimnames(se.class) <- list(names(y),round(P,3))
#rownames(se.class) <- row.names

ds.plot <- data.frame(Classes=as.vector(col(pi.class)),Manifest.variables=as.vector(row(pi.class)),value=as.vector(pi.class))

ds.plot.stata <- data.frame(Classes=as.vector(col(pi.class)),Manifest.variables=as.vector(row(pi.class)), value=as.vector(pi.class),names=rownames(pi.class), se=as.vector(se.class))

ds.plot.stata %>% tidyr::pivot_wider(names_from=Classes, values_from=c("value","se")) %>% 
    dplyr::mutate_if(is.numeric, ~round(.,3))

ds.plot.stata %>% tidyr::pivot_wider(names_from=Classes, values_from=c("value","se")) %>% 
    dplyr::mutate_if(is.numeric, ~round(.,3)) %>% 
    dplyr::mutate(c1= paste0(value_1*100," (",se_1*100,")")) %>% 
    dplyr::mutate(c2= paste0(value_2*100," (",se_2*100,")")) %>%
    dplyr::mutate(c3= paste0(value_3*100," (",se_3*100,")")) %>% 
    dplyr::mutate(c4= paste0(value_4*100," (",se_4*100,")")) %>% 
    dplyr::mutate(c5= paste0(value_5*100," (",se_5*100,")")) %>% 
  dplyr::select(Manifest.variables, names, paste0("c",1:5))

#https://datascience.stackexchange.com/questions/40632/how-do-i-use-the-model-generated-by-the-r-package-polca-to-classify-new-data-as

#no sale, tira errores por el poLCAparallel
Code
#https://cran.r-project.org/web/packages/flexmix/vignettes/regression-examples.pdf
#https://cran.r-project.org/web/packages/flexmix/vignettes/mixture-regressions.pdf
#https://cran.r-project.org/web/packages/flexmix/vignettes/bootstrapping.pdf
require(flexmix)
#2023-
mydata_preds3_reg<-
  mydata_preds3 %>% 
    dplyr::group_by(CAUSAL, EDAD_MUJER_REC, MACROZONA, HITO1_EDAD_GEST_SEM_REC, PREV_TRAMO_REC, PAIS_ORIGEN_REC) %>% 
    dplyr::summarise(success= sum(as.numeric(outcome)-1, na.rm=T),total= n(),failure= total- success) %>% 
    data.table::data.table() %>% 
    data.frame()
#table_homolog
require(flexmix)

#t. The general form is y~x|g where y is the response, x the set of predictors and g an optional grouping factor for repeated measurements.
# k= Number of clusters (not needed if cluster is specified).
# 
set.seed(2125)
flexmix_fit <- stepFlexmix(cbind(success, failure) ~ CAUSAL+ EDAD_MUJER_REC+ MACROZONA+ HITO1_EDAD_GEST_SEM_REC+ PREV_TRAMO_REC+ PAIS_ORIGEN_REC, 
      data =  mydata_preds3_reg,
      model = FLXMRglmfix(family = "binomial"),
      k = 1:9, 
      nrep = 2.5e3, #1.5? probar
      control = list(iter = 5e4, tol = 1e-8) #, minprior = 0.2 not recommended
      )
try(getModel(ArtEx.fit, "BIC"))
getModel(flexmix_fit, "BIC")
invisible("Modelo XXX, mejor ajuste. ")

#predict latent class membership
predictions <- clusters(fit)
table(predictions)
#posterior probabilities
posterior_probs <- posterior(fit)
head(posterior_probs)

Regresión logística (distal)

Code
invisible("2023-09-27: para obtener las probabildiades")
require(glca)
Loading required package: glca
Warning: package 'glca' was built under R version 4.1.3
Code
df_best_model_glca_w_distal_outcome<-
  data.frame(coef(best_model_glca_w_distal_outcome)) %>% rownames_to_column("term") %>% janitor::clean_names() %>% 
    rownames_to_column("rowname") %>%
    gather(key = "key", value = "value", -rowname) %>%
    spread(key = "rowname", value = "value") %>% 
    set_names(as.character(unlist(tail(., 1)))) %>% 
    slice(-n()) %>% 
    dplyr::mutate(term=strsplit(sub('^(.*?_.*?_.*?)_(.*)$', '\\1,\\2', term), ',')) %>% 
separate(col=term,into = c("prefix", "suffix"),  sep = ", ", extra = "merge") %>% 
  dplyr::mutate(across(c("prefix", "suffix"), ~gsub('\\(|"|\\)', "", .))) 
Class 1 / 5 :
            Odds Ratio Coefficient  Std. Error  t value  Pr(>|t|)    
(Intercept)     2.9257      1.0735      0.3718    2.887  0.003907 ** 
outcome1        0.2857     -1.2527      0.3355   -3.734  0.000191 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Class 2 / 5 :
            Odds Ratio Coefficient  Std. Error  t value  Pr(>|t|)  
(Intercept)     0.4006     -0.9149      0.5006   -1.828    0.0677 .
outcome1        0.7858     -0.2410      0.4769   -0.505    0.6133  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Class 3 / 5 :
            Odds Ratio Coefficient  Std. Error  t value  Pr(>|t|)    
(Intercept)    18.4092      2.9128      0.3577    8.142  5.26e-16 ***
outcome1        0.1830     -1.6981      0.3147   -5.396  7.26e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Class 4 / 5 :
            Odds Ratio Coefficient  Std. Error  t value  Pr(>|t|)  
(Intercept)     1.9386      0.6620      0.3775    1.754    0.0796 .
outcome1        0.5676     -0.5664      0.3464   -1.635    0.1021  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
df_best_model_glca_w_distal_outcome2<-
df_best_model_glca_w_distal_outcome%>%
  dplyr::filter(suffix == "coefficient" | suffix == "std_error") %>%
  pivot_wider(names_from=suffix, values_from = c("(Intercept)", "outcome1")) %>%
  dplyr::mutate_at(2:5, ~as.numeric(.)) %>% 
  dplyr::mutate(
    lower_log_or_int = `(Intercept)_coefficient` - 1.96 * `(Intercept)_std_error`,
    upper_log_or_int = `(Intercept)_coefficient` + 1.96 * `outcome1_std_error`,
    lower_log_or_comp = `outcome1_coefficient` - 1.96 * `outcome1_std_error`,
    upper_log_or_comp = `outcome1_coefficient` + 1.96 * `outcome1_std_error`) %>%
  dplyr::rename("int_coef"="(Intercept)_coefficient", "int_std_error"="(Intercept)_std_error", "comp_coef"="outcome1_coefficient","comp_std_error"="outcome1_std_error") %>% 
  dplyr::select(prefix,#t_coef int_std_error comp_coef comp_std_error 
                int_coef, int_std_error, comp_coef, comp_std_error, 
                lower_log_or_int, upper_log_or_int, lower_log_or_comp, upper_log_or_comp)

# List of call classes
call_classes <- unique(df_best_model_glca_w_distal_outcome2$prefix)

result2 <- map_df(call_classes, ~ {
  df_best_model_glca_w_distal_outcome2 %>%
    dplyr::mutate(int_comp_coef=int_coef+comp_coef) %>% 
    dplyr::summarise(call_class = .x,
                     nom = sum(ifelse(prefix == .x, exp(int_comp_coef), 0)),
                     den = sum(exp(int_comp_coef)),
                     prob = nom/(1+den))
})

result2_lo <- map_df(call_classes, ~ {
    df_best_model_glca_w_distal_outcome2 %>%
        dplyr::mutate(int_comp_coef=lower_log_or_int+lower_log_or_comp) %>% 
        dplyr::summarise(call_class = .x,
                         nom = sum(ifelse(prefix == .x, exp(int_comp_coef), 0)),
                         den = sum(exp(int_comp_coef)),
                         prob = nom/(1+den))
})

result2_hi <- map_df(call_classes, ~ {
    df_best_model_glca_w_distal_outcome2 %>%
        dplyr::mutate(int_comp_coef=upper_log_or_int+upper_log_or_comp) %>% 
        dplyr::summarise(call_class = .x,
                         nom = sum(ifelse(prefix == .x, exp(int_comp_coef), 0)),
                         den = sum(exp(int_comp_coef)),
                         prob = nom/(1+den))
})


result2b <- map_df(call_classes, ~ {
  df_best_model_glca_w_distal_outcome2 %>%
    dplyr::mutate(int_comp_coef=int_coef) %>% 
    dplyr::summarise(call_class = .x,
                     nom = sum(ifelse(prefix == .x, exp(int_comp_coef), 0)),
                     den = sum(exp(int_comp_coef)),
                     prob = nom/(1+den))
})

result2b_lo <- map_df(call_classes, ~ {
    df_best_model_glca_w_distal_outcome2 %>%
        dplyr::mutate(int_comp_coef=lower_log_or_int) %>% 
        dplyr::summarise(call_class = .x,
                         nom = sum(ifelse(prefix == .x, exp(int_comp_coef), 0)),
                         den = sum(exp(int_comp_coef)),
                         prob = nom/(1+den))
})

result2b_hi <- map_df(call_classes, ~ {
    df_best_model_glca_w_distal_outcome2 %>%
        dplyr::mutate(int_comp_coef=upper_log_or_int) %>% 
        dplyr::summarise(call_class = .x,
                         nom = sum(ifelse(prefix == .x, exp(int_comp_coef), 0)),
                         den = sum(exp(int_comp_coef)),
                         prob = nom/(1+den))
})

df_lca40522_probs<-
cbind.data.frame(
est=c(result2$prob, 1/(1+unique(result2$den))),
lo=c(result2_lo$prob, 1/(1+unique(result2_lo$den))),
hi=c(result2_hi$prob, 1/(1+unique(result2_hi$den))),
est_int=c(result2b$prob, 1/(1+unique(result2b$den))),
lo_int=c(result2b_lo$prob, 1/(1+unique(result2b_lo$den))),
hi_int=c(result2b_hi$prob, 1/(1+unique(result2b_hi$den))))*100


df_lca40522_probs %>% 
  knitr::kable("markdown", caption="Tabla de probabilidades de LCA con resultado distal (int: no interrumpe)", digits=1)
Tabla de probabilidades de LCA con resultado distal (int: no interrumpe)
est lo hi est_int lo_int hi_int
12.6 8.6 14.2 11.9 11.2 12.4
4.8 1.9 9.3 1.6 1.2 2.2
50.9 37.2 52.6 74.6 72.4 74.8
16.6 11.0 19.4 7.9 7.3 8.4
15.1 41.3 4.5 4.1 7.9 2.2
Code
# Table: Tabla de probabilidades de LCA con resultado distal (int: no interrumpe)
# 
# |   est|    lo|    hi| est_int| lo_int| hi_int|
# |-----:|-----:|-----:|-------:|------:|------:|
# | 0.126| 0.086| 0.142|   0.119|  0.112|  0.124|
# | 0.048| 0.019| 0.093|   0.016|  0.012|  0.022|
# | 0.509| 0.372| 0.526|   0.746|  0.724|  0.748|
# | 0.166| 0.110| 0.194|   0.079|  0.073|  0.084|
# | 0.151| 0.413| 0.045|   0.041|  0.079|  0.022|


Información de la sesión

Code
require(tidyverse)
sesion_info <- devtools::session_info()
dplyr::select(
  tibble::as_tibble(sesion_info$packages),
  c(package, loadedversion, source)
) %>% 
  DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
              caption = htmltools::tags$caption(
        style = 'caption-side: top; text-align: left;',
        '', htmltools::em('Packages')),
      options=list(
initComplete = htmlwidgets::JS(
        "function(settings, json) {",
        "$(this.api().tables().body()).css({
            'font-family': 'Helvetica Neue',
            'font-size': '50%', 
            'code-inline-font-size': '15%', 
            'white-space': 'nowrap',
            'line-height': '0.75em',
            'min-height': '0.5em'
            });",#;
        "}")))